import mlflow
import pandas as pd

def generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_acts",
    note_to_compare="sizes_L2_without_acts",
    group_type="sim"
):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )

        for run in runs:
            note = run.data.params.get("note")
            fusion = run.data.params.get("SAE_fusion_strategy")
            group = run.data.params.get("group_type")

            if fusion != aggregation_function or group != group_type:
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            dim = int(run.data.params.get("embedding_dim", 0))
            topk = int(run.data.params.get("top_k", 0))

            row_key = (dataset, dim, topk)
            model_type = "main" if note == main_note else "compare" if note == note_to_compare else None
            if not model_type:
                continue

            metrics = {
                ("G/mean", model_type): run.data.metrics.get("CommonItemsNDCG20/median"),
                ("U/mean", model_type): run.data.metrics.get("NDCG20/mean"),
                ("U/min", model_type): run.data.metrics.get("NDCG20/min"),
                ("Pop", model_type): run.data.metrics.get("Popularity/mean"),
            }

            all_rows.append((row_key, metrics))

    # Combine metrics into a dictionary
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Dataset", "Dimensions", "TopK"]

    # Calculate % differences
    result_cols = []
    for metric in sorted(set(k[0] for k in df.columns)):
        main_col = (metric, "main")
        compare_col = (metric, "compare")
        percent_col = (metric, "% change")

        if main_col in df.columns and compare_col in df.columns:
            df[percent_col] = ((df[main_col] - df[compare_col]) / df[compare_col].abs()) * 100
            result_cols.extend([main_col, percent_col])
        elif main_col in df.columns:
            result_cols.append(main_col)

    # Keep only main and percent change columns, sort them by metric
    df = df[result_cols]
    df = df.round(2)
    df = df.sort_index(axis=1, level=0).sort_values(by=["Dataset", "Dimensions", "TopK"])
    

    return df.reset_index()

Normalized embeddings

Jedna z moznych veci, co muze v modelech nastat je, ze velikost sparse embeddingu muze mit odlisnou distribuci mezi cleny skupiny. jinymi slovy, nekdo muze mit vetsi hodnoty embeddingu nez nekdo jiny. Pri agregaci by to pote znamenalo, ze nektere uzivatele budou vice ovlivnovat vysledny embedding nez jini. To muze byt problem, pokud chceme, aby vysledky byly fair pro vsechny uzivatele.

Pojdme nejdrive prozkoumat, zda takovy jev opravdu nastava. Koukneme se na distribuci sumy hodnot v embeddingu pres sample 5000 uzivatelu. Opet vezmeme stejny priklad jako minule tedy dimenzi 2048 a topk 64. Jak je videt na grafu, histogram sum embeddingu uzivatelu tvori normalni rozdeleni. Vsichni uzivatele tedy nemaji stejne hodnoty a normalizace by mohla pomoct udelat uzivatele fairnejsi.

Nyni se jeste pojdme podivat na graf pokud vypneme normalizaci. Jak je videt, zde uz se nejedna o ciste normalni rozdeleni, ale ocasek u vetsich hodnot je mnohem vyraznejsi. I zde by mohla normalizace pomoci, tak aby nebyli nekteri uzivatele vetsiho embeddingu preferovani oproti ostatnim.

Noramlizaci, kterou chceme pouzit je vzit L2 normu embeddingu a pronasobit ji prumernou hodnotou embeddingu. Tedy normalizace bude vypadat takto:

\begin{equation*} \text{normalized\_embedding} = \frac{\text{embedding}}{\|\text{embedding}\|_2} \cdot \text{mean}(\text{embedding}) \end{equation*}

Podivejme se jak tedy vypadaji normalizovane doporuceni. Nejdrive pro commen features bez aktivace

SAE group recommendation performance for common features aggregation function and similar groups

Comparision of base model and model with normalized embeddings

experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts_normalized",
    note_to_compare="sizes_L2_without_acts",
    group_type="sim"
)
Dataset Dimensions TopK G/mean Pop U/mean U/min
% change main % change main % change main % change main
0 LastFM1k 1024 32 0.00 0.59 0.05 0.62 -0.06 0.79 -0.09 0.63
1 LastFM1k 1024 64 0.00 0.58 -0.15 0.61 0.05 0.80 -0.04 0.63
2 LastFM1k 1024 128 0.00 0.60 0.00 0.60 0.95 0.81 0.18 0.63
3 LastFM1k 2048 32 0.00 0.59 -0.20 0.64 -0.04 0.79 0.61 0.62
4 LastFM1k 2048 64 0.00 0.60 0.00 0.62 0.41 0.81 1.02 0.64
5 LastFM1k 2048 128 0.00 0.61 0.00 0.61 0.38 0.81 -0.08 0.61
6 LastFM1k 4096 32 0.00 0.57 0.00 0.66 0.00 0.78 -0.03 0.63
7 LastFM1k 4096 64 0.00 0.52 0.00 0.63 -0.01 0.81 -0.03 0.62
8 LastFM1k 4096 128 0.00 0.59 0.00 0.62 -0.07 0.81 -3.26 0.61
9 MovieLens 1024 32 0.00 0.58 -1.02 0.51 0.86 0.65 -0.20 0.52
10 MovieLens 1024 64 -0.23 0.54 0.00 0.50 0.17 0.66 -0.07 0.53
11 MovieLens 1024 128 0.00 0.65 -0.59 0.49 0.14 0.67 -0.21 0.51
12 MovieLens 2048 32 0.00 0.68 0.23 0.49 -0.12 0.65 -0.66 0.54
13 MovieLens 2048 64 0.00 0.45 -0.98 0.48 0.18 0.65 0.04 0.53
14 MovieLens 2048 128 0.00 0.51 1.18 0.48 0.09 0.67 0.46 0.53
15 MovieLens 4096 32 0.87 0.67 0.43 0.49 -0.00 0.66 -1.32 0.54
16 MovieLens 4096 64 0.00 0.59 1.07 0.51 0.00 0.66 0.57 0.54
17 MovieLens 4096 128 0.34 0.57 0.02 0.49 0.48 0.65 -0.35 0.51

Jak je videt, tato zmena je naprosto minimalni a nedochazi k zadne zmene v doporucenich. Nyni se podivejme na average s aktivaci.

SAE group recommendation performance for average aggregation function and similar groups

Comparision of base model and model with normalized embeddings

experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_with_acts_normalized",
    note_to_compare="sizes_L2_with_acts",
    group_type="sim"
)
Dataset Dimensions TopK G/mean Pop U/mean U/min
% change main % change main % change main % change main
0 LastFM1k 1024 32 0.00 0.52 0.92 0.61 -0.04 0.81 1.76 0.65
1 LastFM1k 1024 64 0.00 0.56 -0.28 0.61 0.05 0.81 -2.11 0.63
2 LastFM1k 1024 128 -0.24 0.57 0.01 0.61 0.15 0.81 -0.21 0.65
3 LastFM1k 2048 32 -0.48 0.58 -0.02 0.62 0.04 0.82 -1.85 0.63
4 LastFM1k 2048 64 0.00 0.58 0.11 0.61 -0.33 0.82 -1.47 0.66
5 LastFM1k 2048 128 -0.37 0.64 0.71 0.60 -0.06 0.82 -3.46 0.64
6 LastFM1k 4096 32 0.00 0.59 1.41 0.62 0.20 0.82 -0.10 0.65
7 LastFM1k 4096 64 -0.76 0.58 0.50 0.61 -0.03 0.82 -1.16 0.64
8 LastFM1k 4096 128 -0.29 0.62 -0.11 0.61 -0.67 0.81 -0.80 0.64
9 MovieLens 1024 32 -3.73 0.57 0.44 0.53 -0.09 0.69 -0.08 0.58
10 MovieLens 1024 64 -7.74 0.69 -0.14 0.54 -0.53 0.69 0.36 0.58
11 MovieLens 1024 128 0.00 0.66 0.27 0.54 -0.56 0.69 -0.01 0.58
12 MovieLens 2048 32 8.12 0.71 -0.82 0.54 0.05 0.69 -0.14 0.58
13 MovieLens 2048 64 -7.31 0.62 -0.11 0.53 -0.52 0.69 -0.38 0.58
14 MovieLens 2048 128 0.00 0.70 1.32 0.53 0.18 0.69 0.20 0.58
15 MovieLens 4096 32 10.06 0.63 -0.86 0.54 0.36 0.68 0.06 0.56
16 MovieLens 4096 64 6.14 0.73 -0.64 0.54 1.35 0.69 -1.36 0.58
17 MovieLens 4096 128 5.80 0.72 1.28 0.54 -0.98 0.69 -1.30 0.58